#网页版百度文库下载器

from flask import Flask,render_template,request,jsonify,send_file,make_response
import requests,re,json
app=Flask(__name__)

@app.route("/index",methods=["GET"])
def index():
    return render_template("index.html")

@app.route("/download",methods=["POST"])
def download():
    url1=request.form["url"]
    down=request.form.get("download","off")

    headers={
        "User-Agent":"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Mobile Safari/537.36"
    }
    respose1=requests.get(url1,headers=headers)
    #with open("test.html","wb") as f:
    #    f.write(respose1.content)
    respose1=respose1.text
    doc_info_list=re.findall(r"docinfo: ({.*})",respose1,flags=re.I)
    title=re.findall(r"<title>(.*)</title>",respose1)[0]
    #print(len(doc_info_list),title)

    if len(doc_info_list)>0:
        doc_info=json.loads(doc_info_list[1])
        #print(doc_info)
        doc_id=doc_info['doc_id']
        md5sum=doc_info['md5sum']
        doc_type=doc_info['docInfo']['docType']
        doc_range=""
        page_num=len(doc_info['bcsParam'])+1
        for bcs in doc_info['bcsParam']:
            doc_range+=bcs['merge']+"_"

        #print(doc_type)
        if doc_type == "3":#ppt
            url2 = "https://wkretype.bdimg.com/retype/merge/" + doc_id + "?" + md5sum + "&range=" + doc_range
        elif doc_type == "1":
            return r"<script>alert('不支持的类型')</script>"
        else:
            url2 = "https://wkretype.bdimg.com/retype/merge/" + doc_id + "?" + md5sum + "&range=" + doc_range + "&pn=1&rn=" + str(page_num)


        respose2=requests.get(url2,headers=headers)
        respose2_json=json.loads(respose2.text)
        content=""
        for parags in respose2_json:
            i=respose2_json.index(parags)
            if parags["parags"][0]["t"] == "txt":
                content+=parags["parags"][0]["c"]
            elif parags["parags"][0]["t"] == "pic":
                c=parags["parags"][0]["c"]
                o=parags["parags"][0]["o"]
                img="https://wkretype.bdimg.com/retype/zoom/"+doc_id+"?o="+o+"&type=pic"+md5sum+doc_info['bcsParam'][i]["zoom"]+"\n\n"
                content+=img

        response=make_response(content.encode("utf-8"))
        if down == "off":
            return response
        filename=(title+".txt").encode().decode('latin-1')
        response.headers['Content-Type']="text/plain"
        response.headers["Content-Disposition"] = "attachment; filename=%s" % (filename)
        return response



if __name__ == '__main__':
    app.run(host='0.0.0.0',debug=False)